home *** CD-ROM | disk | FTP | other *** search
- /* Tools.c The expression parser used by grep.
- */
-
- #include "stdio.h"
- #include "tools.h"
-
- /*
- * This module contains the various routines needed by grep
- * to match regular expressions. Routines are ordered alphabetically.
- */
-
- int amatch( lin, pat, boln )
- char *lin, *boln;
- TOKEN *pat;
- {
-
- register char *bocl, *rval, *strstart;
-
- if (pat == 0)
- return (0);
-
- strstart = lin;
-
- while ( pat )
- {
- if (pat->tok == CLOSURE && pat->next)
- {
-
- pat = pat->next;
-
- bocl = lin;
-
- while ( *lin && omatch(&lin, pat, boln) )
- ;
-
- if (pat = pat->next)
- {
- while ( bocl <= lin )
- {
- if (rval = amatch(lin, pat, boln) )
- {
- return(rval);
- }
- else
- --lin;
- }
- return (0);
- }
- }
- else if ( omatch(&lin, pat, boln) )
- {
- pat = pat->next;
- }
- else
- {
- return (0);
- }
- }
-
- return ( max(strstart, --lin) );
- }
-
-
- delete( ch, str )
- int ch;
- register char *str;
- {
- ch &= 0xff;
-
- while ( *str && *str != ch)
- str++;
-
- while ( *str )
- {
- *str = *(str+1);
- str++;
- }
- }
-
-
- int dodash(delim, src, dest, maxccl)
-
- int delim, maxccl;
- char **src, *dest;
- {
-
- register char *dstart;
- register int k, at_begin;
- char *sptr;
-
- dstart = dest;
- sptr = *src;
- at_begin = 1;
-
- while ( *sptr && (*sptr != delim) && (dest-dstart <maxccl) )
- {
- if ( *sptr == ESCAPE )
- {
- *dest++ = esc(&sptr);
- sptr++;
- }
-
- else if ( *sptr != '-')
- *dest++ = *sptr++;
-
- else if ( at_begin || *(sptr+1) == delim )
- *dest++ = '-';
-
- else if ( *(sptr -1 ) <= *(sptr+1) )
- {
- sptr++;
-
- for(k= *(sptr-2) ; ++k <= *sptr ;)
- *dest++ = k;
-
- sptr++;
- }
- else
- {
- return (0);
- }
-
- at_begin = 0;
- }
-
- *dest++ = '\000' ;
- *src = sptr;
-
- return (dest - dstart);
- }
-
-
- int esc(s)
- char **s;
- {
- register int rval;
-
- if ( **s != ESCAPE)
- {
- rval = **s;
- }
- else
- {
- (*s)++;
-
- switch( toupper(**s) )
- {
- case '\000': rval = ESCAPE; break;
- case 'S': rval = ' '; break;
- case 'N': rval = '\n'; break;
- case 'T': rval = '\t'; break;
- case 'B': rval = '\b'; break;
- case 'R': rval = '\r'; break;
- default : rval = **s ; break;
-
- }
- }
-
- return (rval);
- }
-
-
- TOKEN *getpat( arg )
- char *arg;
- {
- return ( makepat(arg, '\000' ) );
- }
-
-
- insert( ch, str )
- int ch;
- register char *str;
- {
-
- register char *bp;
-
- bp = str;
-
- while (*str)
- str++;
- do
- {
- *(str+1) = *str;
- str--;
-
- } while (str >= bp);
-
- *bp = ch;
- }
-
-
- char *in_string( delim, str )
- register int delim;
- register char *str;
- {
-
- delim &= 0x7f;
-
- while (*str && *str != delim)
- str++;
-
- return ( *str ? str : 0 );
- }
-
-
- int isalphanum(c)
- int c;
- {
- return ( ('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9')
- );
- }
-
-
- TOKEN *makepat(arg, delim)
- char *arg;
- int delim;
- {
-
- TOKEN *head, *tail;
- TOKEN *ntok;
- char buf[CLS_SIZE];
- int error;
-
- if (*arg=='\0' || *arg==delim || *arg=='\n' || *arg==CLOSURE)
- return(0);
-
- error = 0;
- head = 0;
- tail = 0;
-
- while ( *arg && *arg != delim && *arg != '\n' && !error)
- {
- ntok = malloc( TOKSIZE );
- ntok->string = &(ntok->lchar);
- ntok->lchar = '\000';
- ntok->next = 0;
-
- switch(*arg)
- {
- case ANY:
- ntok->tok = ANY;
- break;
-
- case BOL:
-
- if (head==0)
- ntok->tok = BOL;
- else
- error = 1;
- break;
-
- case EOL:
- if ( *(arg+1) == delim || *(arg+1) == '\000'
- || *(arg+1) == '\n' )
- ntok->tok = EOL;
- else
- error = 1;
- break;
-
- case CCL:
- if (*(arg+1) == NEGATE)
- {
- ntok->tok = NCCL;
- arg += 2;
- }
- else
- {
- ntok->tok = CCL;
- arg++;
- }
-
- error = dodash(CCLEND, &arg, buf, CLS_SIZE) ;
- if (error != 0)
- {
- ntok->string = malloc( error );
- strcpy( ntok->string, buf );
- error = 0;
- }
-
- break;
-
- case CLOSURE:
- if ( head != 0)
- {
- switch ( tail->tok )
- {
- case BOL:
- case EOL:
- case CLOSURE:
- return(0);
-
- default:
- ntok->tok = CLOSURE;
- }
- }
- break;
-
- default:
- ntok->tok = LITCHAR;
- ntok->lchar = esc(&arg);
- }
-
- if( error || ntok == 0 )
- {
- unmakepat(head);
- return (0);
- }
-
- else if (head == 0)
- {
- ntok->next = 0;
- head = tail = ntok;
- }
-
- else if (ntok->tok != CLOSURE)
-
- {
- tail->next = ntok;
- ntok->next = tail;
- tail = ntok;
- }
-
- else if (head != tail)
- {
- (tail->next)->next = ntok;
- ntok->next = tail;
- }
-
- else
- {
- ntok->next = head;
- tail->next = ntok;
- head = ntok;
- }
-
- arg++;
- }
-
- tail->next = 0;
- return (head);
- }
-
-
- char *matchs(line, pat, ret_endp)
- char *line;
- TOKEN *pat;
- int ret_endp;
- {
-
- char *rval, *bptr;
-
- bptr = line;
-
- while (*line)
- {
- if ( (rval = amatch(line, pat, bptr)) == 0 )
- {
- line++;
- }
- else
- {
- rval = ret_endp ? rval : line ;
- break;
- }
- }
-
- return (rval);
- }
-
-
- stoupper(str)
- char *str;
- {
-
- char *rval;
-
- rval = str;
-
- while (*str)
- {
- if ( 'a' <= *str && *str <= 'z' )
- *str -= ('a' - 'A');
-
- str++;
- }
- return(rval);
- }
-
-
-
- int omatch (linp, pat, boln)
- char **linp, *boln;
- TOKEN *pat;
- {
-
- register int advance;
-
- advance = -1;
-
- if ( **linp )
- {
- switch ( pat->tok )
- {
- case LITCHAR:
- if ( **linp == pat->lchar )
- advance = 1;
- break;
-
- case BOL:
- if ( *linp == boln )
- advance = 0;
- break;
-
- case ANY:
- if ( **linp != '\n' )
- advance = 1;
- break;
-
- case EOL:
- if ( **linp == '\n' )
- advance = 0;
- break;
-
- case CCL:
- if( in_string (**linp, pat->string) )
- advance = 1;
- break;
-
- case NCCL:
- if ( ! in_string (**linp, pat->string ) )
- advance = 1;
- break;
-
- default:
- printf("omatch: can't happen\n");
- }
- }
-
- if (advance >= 0)
- *linp += advance;
-
- return( ++advance );
- }
-
-
- pr_line(ln)
- register char *ln;
- {
-
- for ( ; *ln ; ln++ )
-
- {
- if ( (' ' <= *ln) && (*ln <= '~') )
- putchar(*ln);
- else
- {
- printf("\\0x%02x", *ln);
-
- if (*ln == '\n')
- putchar('\n');
- }
- }
- }
-
-
- pr_tok(head)
- TOKEN *head;
- {
- register char *str;
-
- for (; head ; head = head->next )
- {
- switch (head->tok)
- {
- case BOL:
- str = "BOL";
- break;
-
- case EOL:
- str = "EOL";
- break;
-
- case ANY:
- str = "ANY";
- break;
-
- case LITCHAR:
- str = "LITCHAR";
- break;
-
- case ESCAPE:
- str = "ESCAPE";
- break;
-
- case CCL:
- str = "CCL";
- break;
-
- case CCLEND:
- str = "CCLEND";
- break;
-
- case NCCL:
- str = "NCCL";
- break;
-
- case CLOSURE:
- str = "CLOSURE";
- break;
-
- default:
- str = "*** unknown ***";
- }
-
- printf("%-8s at: 0x%x, ", str, head);
-
- if (head->tok == CCL || head->tok == NCCL)
- printf ("string = [%s]=, ", head->string );
-
- else if (head->tok == LITCHAR)
- printf("lchar = %c, ", head->lchar);
-
- printf("next = 0x%x\n", head->next);
- }
-
- putchar('\n');
- }
-
-
- unmakepat(head)
- TOKEN *head;
- {
-
- register TOKEN *old_head;
-
- while (head)
- {
- switch (head->tok)
- {
- case CCL:
- case NCCL:
- free(head->string);
-
- default:
- old_head = head;
- head = head->next;
- free(old_head);
- break;
- }
- }
- }
-
-
-